

library(tidyverse)
library(magrittr)
library(lubridate)
library(knitr)
library(lfe)
library(knitr)
library(stargazer)
library(sandwich)
library(multiwayvcov)

rootdir <- dirname(getwd())
scratchdir <- file.path(rootdir, "data", "scratch_data")
cachedir <- file.path(scratchdir, "cached_data")
embsdir <- file.path(scratchdir, "embs")
outdir <- file.path(rootdir, "TEX", "Tables")
outdir2 <- file.path(rootdir, "TEX", "Figures")

embs <- readRDS(file.path(cachedir, "embs_cleaned_withpoolsq.RDS"))
IDC <- readRDS(file.path(cachedir, "20210324_2009_2013_mnthly_IDC_prices.RDS"))
MBS_char_avg <- readRDS(file.path(cachedir, "20190506_LTV_size_from_embs_quartile.RDS"))

# seccur is to get MBS coupon
seccur <- read_csv(file.path(embsdir, "embs-seccur.csv"),
                   col_types = "dcidi")
pmms <- read_csv(file.path(cachedir, "PMMSR30_wkly.csv"))

# Data cleaning

#We keep 30yr pools. Keep both pools and pool-of-pools. For CLL, we will use 2009
#through 2012, for LTV, 2012 through 2013 (roughly HARP2.0 period), Freddie only.

IDC <- IDC %>%
  mutate(mnth_date = floor_date(date, "month")) %>%
  select(-date)

mnthly_pmms <- pmms %>%
  mutate(mnth_date = floor_date(DATE, "month")) %>%
  arrange(mnth_date, DATE) %>%
  group_by(mnth_date) %>%
  filter(row_number() == n()) %>%
  ungroup() %>%
  select(-DATE)


## Clean MBS characteristicredit_score
MBS_char <- MBS_char_avg %>%
  mutate(OSIZE = OSIZE/1000)

embs_pool <- embs %>%
  filter(type != "TBA" & bond_term_yr == 30) %>%
  left_join(MBS_char) %>%
  left_join(seccur[, c("cusip_id", "coupon")]) %>%
  left_join(mnthly_pmms, c("issue_date" = "mnth_date")) %>%
  filter(OSIZE <= 700 & OLTV <= 125)

embs_tba <- embs %>%
  filter(embs_productsuptype == "TBA" & bond_term_yr == 30) %>%
  inner_join(seccur[, c("cusip_id", "coupon")]) %>%
  mutate(tmp = substr(secmnem_id, 9, 11)) %>%
  mutate(settlement_mnth = match(tmp, toupper(month.abb))) %>%
  filter(substr(cusip_id, 1, 3) != "01R") %>%
  rename(TBA_cusip_id = cusip_id) %>%
  select(TBA_cusip_id, agency_id, coupon, settlement_mnth)

CLL_embs <- embs_pool %>%
  filter(type != "High_LTV") %>%
  filter(issue_date >= as.Date("2009-01-01") & issue_date <= as.Date("2012-12-31")) %>%
  filter(type == "TBA_eligible" | OSIZE > 417 ) %>%
  filter(type == "Jumbo" | OSIZE <= 417 ) %>%
  filter(OSIZE >= 250 )

LTV_embs <- embs_pool %>%
  filter(type != "Jumbo") %>%
  filter(issue_date >= as.Date("2012-01-01") & issue_date <= as.Date("2013-12-31")) %>%
  filter(OLTV >80 & OLTV <= 125) %>%
  filter(agency_id == "FHL")



#Look at the dominant coupons in each sample.

foo1 <- count(CLL_embs, coupon) %>%
  arrange(desc(n))

print("CLL sample")
print(foo1 %>% filter(row_number() <= 10))

foo2 <- count(LTV_embs, coupon) %>%
  arrange(desc(n))

print("LTV sample")
print(foo2 %>% filter(row_number() <= 10))

#Thus, 
#
#1. For CLL sample, we use 3.5 through 5
#1. For LTV sample, use 3 through 4.5

CLL_embs <- CLL_embs %>% filter(coupon %in% seq(3.5, 5, by = 0.5))
LTV_embs <- LTV_embs %>% filter(coupon %in% seq(3, 4.5, by = 0.5))

## Adjusted price

#We use first month price minus corresponding TBA price.
add_adj_price <- function(in_ds){
  foo <- in_ds %>%
    left_join(IDC, by = c("cusip_id", "issue_date" = "mnth_date")) %>%
    rename(firstmnth_price = price)

  foo <- foo %>%
    mutate(next_mnth_date = issue_date %m+% months(1)) %>%
    left_join(IDC, by = c("cusip_id", "next_mnth_date" = "mnth_date")) %>%
    rename(secondmnth_price = price) %>%
    mutate(price_date = if_else(!is.na(firstmnth_price), issue_date,
                               next_mnth_date)) %>%
    mutate(price = ifelse(!is.na(firstmnth_price), firstmnth_price,
                          secondmnth_price)) %>%
    mutate(settlement_mnth = month(next_mnth_date)) %>%
    select(-firstmnth_price, -secondmnth_price) %>%
    left_join(embs_tba)

  adj_price <- foo %>%
    left_join(IDC, by = c("TBA_cusip_id" = "cusip_id",
                          "price_date" = "mnth_date"),
              suffix = c("", ".TBA")) %>%
    mutate(adj_price = price - price.TBA)
  return(adj_price)
}

CLL_embs <- add_adj_price(CLL_embs)
LTV_embs <- add_adj_price(LTV_embs)

# CLL various windows
# CLL_sample2: 300-600
# CLL_sample3: 350-500

CLL_sample2 <- CLL_embs %>% filter(OSIZE >= 300 & OSIZE <= 600)
CLL_sample3 <- CLL_embs %>% filter(OSIZE >= 350 & OSIZE <= 500)

CLL_sample_list <- list(CLL_sample2, CLL_sample3)

# LTV various windows
# LTV_sample2: 85-120
# LTV_sample3: 90-120

LTV_sample2 <- LTV_embs %>% filter(OLTV >= 85 & OLTV <= 120)
LTV_sample3 <- LTV_embs %>% filter(OLTV >= 90 & OLTV <= 120)

LTV_sample_list <- list(LTV_sample2, LTV_sample3)


# use poly 1, one allows for separate slope, one doesn't

# separate slope
# size regression
run_size_reg <- function(in_ds){
  size_regdata <- in_ds %>%
    filter(!is.na(adj_price)) %>%
    mutate(issue_date = factor(issue_date)) %>%
    mutate(coupon_fac = factor(coupon)) %>%
    mutate(refi = original_wac - MORTGAGE30US) %>%
    mutate(adj_OSIZE = OSIZE - 417)
  if (size_regdata %>% filter(coupon == 5) %>% nrow() == 1){
    size_regdata <- size_regdata %>%
      filter(coupon != 5)
  }
  lmout <- lm(adj_price ~ type  +  type:adj_OSIZE 
              + coupon_fac*OLTV + coupon_fac*adj_OSIZE 
                 + refi + issue_date, data = size_regdata)
  vcov <- vcovHC(lmout)
  return(list(lmout = lmout, se = sqrt(diag(vcov))))
}

# LTV regression
run_ltv_reg <- function(in_ds){
  ltv_regdata <- in_ds %>%
    filter(!is.na(adj_price)) %>%
    mutate(issue_date = factor(issue_date)) %>%
    mutate(coupon_fac = factor(coupon)) %>%
    mutate(refi = original_wac - MORTGAGE30US) %>%
    mutate(adj_OLTV = OLTV - 105)
  lmout <- lm(adj_price ~ type  +  type:adj_OLTV 
              + coupon_fac*OSIZE + coupon_fac*adj_OLTV 
                 + refi +issue_date , data = ltv_regdata)
  vcov <- vcovHC(lmout)
  return(list(lmout = lmout, se = sqrt(diag(vcov))))
}

regout2_size <- map(CLL_sample_list, run_size_reg)
regout2_ltv <- map(LTV_sample_list, run_ltv_reg)

# Same slope
run_reg <- function(in_ds){
  regdata <- in_ds %>%
    filter(!is.na(adj_price)) %>%
    mutate(issue_date = factor(issue_date)) %>%
    mutate(coupon_fac = factor(coupon)) %>%
    mutate(refi = original_wac - MORTGAGE30US)
  lmout1 <- lm(adj_price ~ type + coupon_fac*OLTV+coupon_fac*OSIZE  + refi,
              data = regdata)
  vcov1 <- vcovHC(lmout1)
  return(list(lmout = lmout1, se = sqrt(diag(vcov1))))
}

regout1_size <- map(CLL_sample_list, run_reg)
regout1_ltv <- map(LTV_sample_list, run_reg)

# Organize output
# CLL first, and then LTV
# window first
regout_all <- list(regout1_size[[1]], regout2_size[[1]],
                   regout1_size[[2]], regout2_size[[2]],
                   regout1_ltv[[1]], regout2_ltv[[1]],
                   regout1_ltv[[2]], regout2_ltv[[2]])

lmout_all <- map(regout_all, ~.$lmout)
se_all <- map(regout_all, ~.$se)

stargazer(lmout_all,
          se = se_all,
          covariate.labels = c("TBA-eligible", "Refi"),
          keep = c("typeTBA_eligible","refi"),
          add.lines = list(
                      c("Window", "300-600K", "300-600K", "350-500K", "350-500K",
                        "85-120", "85-120", "90-120", "90-120"),
                      c("Separate slope", rep(c("No", "Yes"), 4)),
                      c("Issue date f.e.", rep("Yes", 8)),
                      c("Other controls", rep("Yes", 8))),
          omit.stat = c("ser", "f", "rsq"),
          float = FALSE,
          column.labels = c("CLL sample", "LTV sample"),
          column.separate = c(4, 4),
          dep.var.labels.include = FALSE,
          dep.var.caption = "",
          out = file.path(outdir, "20210514_MBS_type_price.tex"))

